library(tidyverse) # tidy, manipulate and plot data
df <- read_csv(here("data" ,"AqFl2_qPCR_ref_PI.csv"), col_names = T, na = "")
str(df)
Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 144 obs. of 8 variables:
$ Sample_ID: num 7 8 9 10 11 12 13 14 15 16 ...
$ Diet : chr "IM" "IM" "IM" "IM" ...
$ Net_pen : num 110 110 110 110 110 110 111 111 111 111 ...
$ Ref_gene : chr "gapdh" "gapdh" "gapdh" "gapdh" ...
$ PE : num 1.9 1.9 1.9 1.9 1.9 1.9 1.9 1.9 1.9 1.9 ...
$ Cq : num 18.7 18.2 18.3 18.1 18.2 ...
$ Cq_error : num 0.04 0 0.02 0.04 0.06 0.02 0.04 0.06 0.06 0.01 ...
$ Comments : chr NA NA NA NA ...
- attr(*, "spec")=
.. cols(
.. Sample_ID = col_double(),
.. Diet = col_character(),
.. Net_pen = col_double(),
.. Ref_gene = col_character(),
.. PE = col_double(),
.. Cq = col_double(),
.. Cq_error = col_double(),
.. Comments = col_character()
.. )
# Tidy data
df <- df %>%
arrange(Ref_gene, desc(Diet)) %>%
mutate(Quantity = `^`(PE, -Cq)) %>% # calculate mRNA quantity
group_by(Ref_gene, Diet) %>%
mutate(Mean = mean(Quantity), SD = sd(Quantity)) %>% # calculate average mRNA quantity and sd
ungroup() %>%
group_by(Ref_gene) %>%
mutate(Quantity_std = scale(Quantity), # standardize mRNA quantity of each reference gene
Quantity_nm = Quantity / Quantity[1]) # normalize mRNA quantity to the first sample
# Convert Sample_ID and Net_pen to character variables
df <- within(df, {
Sample_ID <- as.character(Sample_ID)
Net_pen <- as.character(Net_pen)
})
# Convert Diet and Sample_ID to factors, specifying the desired orders for plotting
df$Diet <- factor(df$Diet, levels = c("REF", "IM"))
df$Sample_ID <- factor(df$Sample_ID,
levels = unique(df$Sample_ID)) # use the exact order of Sample_ID column
As packages providing descriptive statistics summarize data by columns, we’ll convert the dataframe from the “long” to “wide” using the dcast() function from the data.table package.
library(data.table)
# Convert dataframe to a data.table object
dt <- as.data.table(df)
# Transform data.table from long to wide
df_wider <- dcast(dt, Sample_ID + Diet + Net_pen ~ Ref_gene, value.var = colnames(dt)[5:13])
library(summarytools)
print(dfSummary(df_wider, valid.col = FALSE, graph.magnif = 0.75), max.tbl.height = 800, method = "render")
| Variable | Stats / Values | Freqs (% of Valid) | Graph | Missing | ||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Sample_ID [factor] | 1. 13 2. 14 3. 15 4. 16 5. 17 6. 18 7. 25 8. 26 9. 27 10. 28 [ 26 others ] |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Diet [factor] | 1. REF 2. IM |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Net_pen [character] | 1. 101 2. 104 3. 105 4. 106 5. 110 6. 111 |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| PE_actb [numeric] | 1 distinct value |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| PE_gapdh [numeric] | 1 distinct value |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| PE_hprt1 [numeric] | 1 distinct value |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| PE_rnapo2 [numeric] | 1 distinct value |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Cq_actb [numeric] | Mean (sd) : 14.7 (0.2) min < med < max: 14.4 < 14.7 < 15.3 IQR (CV) : 0.3 (0) | 30 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Cq_gapdh [numeric] | Mean (sd) : 18.1 (0.2) min < med < max: 17.8 < 18.1 < 18.7 IQR (CV) : 0.3 (0) | 29 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Cq_hprt1 [numeric] | Mean (sd) : 22.2 (0.3) min < med < max: 21.6 < 22.2 < 23.1 IQR (CV) : 0.3 (0) | 30 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Cq_rnapo2 [numeric] | Mean (sd) : 24 (0.2) min < med < max: 23.6 < 23.9 < 24.7 IQR (CV) : 0.3 (0) | 28 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Cq_error_actb [numeric] | Mean (sd) : 0 (0) min < med < max: 0 < 0 < 0.1 IQR (CV) : 0 (0.7) | 11 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Cq_error_gapdh [numeric] | Mean (sd) : 0 (0) min < med < max: 0 < 0 < 0.2 IQR (CV) : 0 (0.9) | 11 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Cq_error_hprt1 [numeric] | Mean (sd) : 0.1 (0.1) min < med < max: 0 < 0.1 < 0.2 IQR (CV) : 0.1 (0.7) | 17 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Cq_error_rnapo2 [numeric] | Mean (sd) : 0.1 (0.1) min < med < max: 0 < 0.1 < 0.2 IQR (CV) : 0.1 (0.8) | 15 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Comments_actb [character] | 1. Calibrated_Cq_from_run_2 |
|
35 (97.22%) | |||||||||||||||||||||||||||||||||||||||||||||
| Comments_gapdh [character] | 1. Calibrated_Cq_from_run_2 |
|
35 (97.22%) | |||||||||||||||||||||||||||||||||||||||||||||
| Comments_hprt1 [character] | 1. Calibrated_Cq_from_run_2 |
|
35 (97.22%) | |||||||||||||||||||||||||||||||||||||||||||||
| Comments_rnapo2 [character] | 1. Calibrated_Cq_from_run_2 |
|
35 (97.22%) | |||||||||||||||||||||||||||||||||||||||||||||
| Quantity_actb [numeric] | Mean (sd) : 0 (0) min < med < max: 0 < 0 < 0 IQR (CV) : 0 (0.1) | 30 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Quantity_gapdh [numeric] | Mean (sd) : 0 (0) min < med < max: 0 < 0 < 0 IQR (CV) : 0 (0.1) | 29 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Quantity_hprt1 [numeric] | Mean (sd) : 0 (0) min < med < max: 0 < 0 < 0 IQR (CV) : 0 (0.2) | 30 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Quantity_rnapo2 [numeric] | Mean (sd) : 0 (0) min < med < max: 0 < 0 < 0 IQR (CV) : 0 (0.1) | 28 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Mean_actb [numeric] | Min : 0 Mean : 0 Max : 0 |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Mean_gapdh [numeric] | Min : 0 Mean : 0 Max : 0 |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Mean_hprt1 [numeric] | Min : 0 Mean : 0 Max : 0 |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Mean_rnapo2 [numeric] | Min : 0 Mean : 0 Max : 0 |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| SD_actb [numeric] | Min : 0 Mean : 0 Max : 0 |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| SD_gapdh [numeric] | Min : 0 Mean : 0 Max : 0 |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| SD_hprt1 [numeric] | Min : 0 Mean : 0 Max : 0 |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| SD_rnapo2 [numeric] | Min : 0 Mean : 0 Max : 0 |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Quantity_std_actb [numeric] | Mean (sd) : 0 (1) min < med < max: -2.4 < -0.1 < 1.6 IQR (CV) : 1.6 (2798353166521473) | 30 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Quantity_std_gapdh [numeric] | Mean (sd) : 0 (1) min < med < max: -2.5 < -0.1 < 1.6 IQR (CV) : 1.5 (3578176847952066) | 29 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Quantity_std_hprt1 [numeric] | Mean (sd) : 0 (1) min < med < max: -2.3 < 0 < 2.4 IQR (CV) : 1.1 (5862571499920934) | 30 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Quantity_std_rnapo2 [numeric] | Mean (sd) : 0 (1) min < med < max: -2.9 < 0.1 < 1.9 IQR (CV) : 1.4 (1897607103291092) | 28 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Quantity_nm_actb [numeric] | Mean (sd) : 1.1 (0.1) min < med < max: 0.8 < 1.1 < 1.4 IQR (CV) : 0.2 (0.1) | 30 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Quantity_nm_gapdh [numeric] | Mean (sd) : 1.1 (0.1) min < med < max: 0.7 < 1.1 < 1.3 IQR (CV) : 0.2 (0.1) | 29 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Quantity_nm_hprt1 [numeric] | Mean (sd) : 0.9 (0.2) min < med < max: 0.5 < 0.9 < 1.4 IQR (CV) : 0.2 (0.2) | 30 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
| Quantity_nm_rnapo2 [numeric] | Mean (sd) : 1 (0.1) min < med < max: 0.6 < 1 < 1.2 IQR (CV) : 0.2 (0.1) | 28 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||||
library(skimr)
library(kableExtra) # Format table
skim_to_wide(df_wider) %>%
kable(align = "l") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| type | variable | missing | complete | n | min | max | empty | n_unique | top_counts | ordered | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| character | Comments_actb | 35 | 1 | 36 | 24 | 24 | 0 | 1 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
| character | Comments_gapdh | 35 | 1 | 36 | 24 | 24 | 0 | 1 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
| character | Comments_hprt1 | 35 | 1 | 36 | 24 | 24 | 0 | 1 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
| character | Comments_rnapo2 | 35 | 1 | 36 | 24 | 24 | 0 | 1 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
| character | Net_pen | 0 | 36 | 36 | 3 | 3 | 0 | 6 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
| factor | Diet | 0 | 36 | 36 | NA | NA | NA | 2 | REF: 18, IM: 18, NA: 0 | FALSE | NA | NA | NA | NA | NA | NA | NA | NA |
| factor | Sample_ID | 0 | 36 | 36 | NA | NA | NA | 36 | 13: 1, 14: 1, 15: 1, 16: 1 | FALSE | NA | NA | NA | NA | NA | NA | NA | NA |
| numeric | Cq_actb | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 14.73 | 0.22 | 14.4 | 14.55 | 14.75 | 14.88 | 15.32 | ▇▂▅▇▅▃▁▁ |
| numeric | Cq_error_actb | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 0.05 | 0.036 | 0.01 | 0.02 | 0.04 | 0.07 | 0.12 | ▇▂▆▂▂▁▁▅ |
| numeric | Cq_error_gapdh | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 0.04 | 0.037 | 0 | 0.017 | 0.03 | 0.05 | 0.19 | ▇▇▃▁▁▁▁▁ |
| numeric | Cq_error_hprt1 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 0.074 | 0.055 | 0 | 0.03 | 0.06 | 0.11 | 0.19 | ▇▇▇▆▃▂▃▃ |
| numeric | Cq_error_rnapo2 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 0.061 | 0.052 | 0 | 0.02 | 0.055 | 0.08 | 0.25 | ▇▇▅▂▁▁▁▁ |
| numeric | Cq_gapdh | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 18.11 | 0.21 | 17.79 | 17.93 | 18.12 | 18.24 | 18.73 | ▇▅▇▇▃▂▁▁ |
| numeric | Cq_hprt1 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 22.22 | 0.31 | 21.59 | 22.06 | 22.19 | 22.41 | 23.11 | ▂▃▆▅▇▂▁▁ |
| numeric | Cq_rnapo2 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 23.95 | 0.21 | 23.58 | 23.8 | 23.91 | 24.08 | 24.68 | ▂▆▆▇▃▁▁▁ |
| numeric | Mean_actb | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 0.00011 | 6.5e-07 | 0.00011 | 0.00011 | 0.00011 | 0.00011 | 0.00011 | ▇▁▁▁▁▁▁▇ |
| numeric | Mean_gapdh | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 9e-06 | 1.6e-07 | 8.9e-06 | 8.9e-06 | 9e-06 | 9.2e-06 | 9.2e-06 | ▇▁▁▁▁▁▁▇ |
| numeric | Mean_hprt1 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 2.1e-07 | 4.6e-09 | 2.1e-07 | 2.1e-07 | 2.1e-07 | 2.1e-07 | 2.1e-07 | ▇▁▁▁▁▁▁▇ |
| numeric | Mean_rnapo2 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 7.7e-07 | 6.4e-10 | 7.7e-07 | 7.7e-07 | 7.7e-07 | 7.7e-07 | 7.7e-07 | ▇▁▁▁▁▁▁▇ |
| numeric | PE_actb | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 1.86 | 0 | 1.86 | 1.86 | 1.86 | 1.86 | 1.86 | ▁▁▁▇▁▁▁▁ |
| numeric | PE_gapdh | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 1.9 | 0 | 1.9 | 1.9 | 1.9 | 1.9 | 1.9 | ▁▁▁▇▁▁▁▁ |
| numeric | PE_hprt1 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 2 | 0 | 2 | 2 | 2 | 2 | 2 | ▁▁▁▇▁▁▁▁ |
| numeric | PE_rnapo2 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 1.8 | 0 | 1.8 | 1.8 | 1.8 | 1.8 | 1.8 | ▁▁▁▇▁▁▁▁ |
| numeric | Quantity_actb | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 0.00011 | 1.4e-05 | 7.4e-05 | 9.7e-05 | 0.00011 | 0.00012 | 0.00013 | ▁▁▅▆▇▅▃▇ |
| numeric | Quantity_gapdh | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 9e-06 | 1.2e-06 | 6e-06 | 8.2e-06 | 8.9e-06 | 1e-05 | 1.1e-05 | ▁▂▂▇▆▆▅▆ |
| numeric | Quantity_hprt1 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 2.1e-07 | 4.4e-08 | 1.1e-07 | 1.8e-07 | 2.1e-07 | 2.3e-07 | 3.2e-07 | ▁▂▇▅▅▃▁▂ |
| numeric | Quantity_nm_actb | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 1.12 | 0.15 | 0.77 | 1.01 | 1.1 | 1.24 | 1.36 | ▁▁▅▆▇▅▃▇ |
| numeric | Quantity_nm_gapdh | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 1.1 | 0.15 | 0.73 | 1 | 1.08 | 1.22 | 1.33 | ▁▂▂▇▆▆▅▆ |
| numeric | Quantity_nm_hprt1 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 0.93 | 0.2 | 0.49 | 0.8 | 0.93 | 1.01 | 1.4 | ▁▂▇▅▅▃▁▂ |
| numeric | Quantity_nm_rnapo2 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 0.98 | 0.12 | 0.64 | 0.9 | 1 | 1.07 | 1.21 | ▁▁▂▇▃▇▅▂ |
| numeric | Quantity_rnapo2 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 7.7e-07 | 9.4e-08 | 5e-07 | 7.1e-07 | 7.9e-07 | 8.4e-07 | 9.6e-07 | ▁▁▂▇▃▇▅▂ |
| numeric | Quantity_std_actb | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 3.6e-16 | 1 | -2.39 | -0.76 | -0.14 | 0.8 | 1.65 | ▁▁▅▆▇▅▃▇ |
| numeric | Quantity_std_gapdh | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 2.8e-16 | 1 | -2.51 | -0.67 | -0.12 | 0.85 | 1.62 | ▁▂▂▇▆▆▅▆ |
| numeric | Quantity_std_hprt1 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 1.7e-16 | 1 | -2.25 | -0.68 | 0.00038 | 0.43 | 2.43 | ▁▂▇▅▅▃▁▂ |
| numeric | Quantity_std_rnapo2 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 5.3e-16 | 1 | -2.92 | -0.67 | 0.12 | 0.71 | 1.94 | ▁▁▂▇▃▇▅▂ |
| numeric | SD_actb | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 1.4e-05 | 1.1e-06 | 1.3e-05 | 1.3e-05 | 1.4e-05 | 1.5e-05 | 1.5e-05 | ▇▁▁▁▁▁▁▇ |
| numeric | SD_gapdh | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 1.2e-06 | 9.8e-08 | 1.1e-06 | 1.1e-06 | 1.2e-06 | 1.3e-06 | 1.3e-06 | ▇▁▁▁▁▁▁▇ |
| numeric | SD_hprt1 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 4.4e-08 | 8.4e-09 | 3.5e-08 | 3.5e-08 | 4.4e-08 | 5.2e-08 | 5.2e-08 | ▇▁▁▁▁▁▁▇ |
| numeric | SD_rnapo2 | 0 | 36 | 36 | NA | NA | NA | NA | NA | NA | 9.4e-08 | 1.7e-08 | 7.7e-08 | 7.7e-08 | 9.4e-08 | 1.1e-07 | 1.1e-07 | ▇▁▁▁▁▁▁▇ |
library(cowplot) # a ggplot2 add-on to provide a publication-ready theme
# Make a dataframe containing the grand mean of mRNA quantity
gm <- df %>%
group_by(Ref_gene) %>%
summarize(gmean = mean(Quantity))
# Make dot charts showing individual expression profile
p1 <- df %>%
ggplot(aes(x = Sample_ID, y = Quantity, group = 1)) +
geom_point(aes(color = Diet)) +
geom_line() +
facet_wrap(~ Ref_gene, ncol = 1, scales = "free_y") +
labs(y = "mRNA quantity", tag = "A") +
scale_y_continuous(labels = scales::scientific) +
geom_hline(aes(yintercept = gmean), gm, color = "blue") + # The blue line shows the grand mean.
theme_bw()
# Make dot charts showing normalized expression levels of all the reference genes in one plot
p2 <- df %>%
ggplot(aes(x = Sample_ID, y = Quantity_nm, color = Ref_gene)) +
geom_point() +
geom_line(aes(group = Ref_gene)) +
labs(y = "normalized mRNA quantity", tag = "B") +
theme_bw() +
scale_fill_brewer(palette = "Dark2")
# Combine the plots
plot_grid(p1, p2, ncol = 1, align = "v", rel_heights = c(3, 2))
# Convert "Sample_ID"" to character, otherwise ggrepel won't work properly
df$Sample_ID <- as.character(df$Sample_ID)
# Define a function for identifying outliers
is_outlier <- function(x) {
x < quantile(x, 0.25) - 1.5 * IQR(x) | x > quantile(x, 0.75) + 1.5 * IQR(x)
}
# Make plots using standardized mRNA quantity
set.seed(1910)
df %>%
group_by(Ref_gene) %>%
mutate(outlier = is_outlier(Quantity_std)) %>%
ggplot(aes(x = Ref_gene, y = Quantity_std, label = ifelse(outlier, Sample_ID, NA))) +
geom_violin(trim = T) +
geom_boxplot(outlier.shape = NA, width = 0.5) +
geom_point(aes(fill = Diet, colour = Diet), size = 3, shape = 21, position = position_jitterdodge(0.2)) +
ggrepel::geom_text_repel(aes(colour = Diet), position = position_jitterdodge(0.2)) + # label outliers
ylab("standardized mRNA quantity") +
theme_cowplot() +
guides(colour = F)
df %>%
ggplot(aes(x = Diet, y = Mean, fill = Diet)) +
geom_bar(stat = "identity", position = position_dodge(), colour = "black") +
geom_errorbar(aes(ymin = Mean, ymax = Mean + SD), size = 0.3, width = 0.2) + # add error bar (sd)
facet_wrap(~ Ref_gene, nrow = 1, scales = "free_y") +
scale_y_continuous(limits = c(0, NA),
expand = expand_scale(mult = c(0, 0.1))) +
ylab("mRNA quantity") +
theme_cowplot()
Make a subset of data for correlation analysis
# Subset dataframe and convert to the wide format
df1 <- df %>%
select(Sample_ID, Ref_gene, Quantity) %>%
spread(key = "Ref_gene", value = "Quantity")
An excellent implementation of correlation matrix visualization is from the PerformanceAnalytics package.
library("PerformanceAnalytics")
chart.Correlation(df1[2:ncol(df1)], histogram = TRUE, pch = 19)
# Compute a correlation matrix
corr <- cor(df1[2:ncol(df1)], method = "pearson", use = "complete.obs")
# Make a correlation plot
library(corrplot)
corrplot.mixed(corr, order = "hclust", tl.col = "black")
# Make a numeric metrix for plotting heatmap
mat <- as.matrix(df1[2:ncol(df1)]) %>%
`row.names<-`(df1[[1]]) # Sample_ID as rownames
# Define color scheme
col <- colorRampPalette(c("blue", "white", "red"))(25)
library(gplots)
heatmap.2(mat,
Rowv = TRUE,
Colv = TRUE,
distfun = function(x) dist(x,method = 'euclidean'),
hclustfun = function(x) hclust(x,method = 'ward.D2'),
ylab = "Sample ID",
dendrogram = "column",
density.info = "none",
scale = "column",
trace = "none",
cexCol = 1,
cexRow = 0.5,
srtCol = 45,
col = col)
library(d3heatmap)
d3heatmap(mat,
Rowv = TRUE,
Colv = TRUE,
distfun = function(x) dist(x,method = 'euclidean'),
hclustfun = function(x) hclust(x,method = 'ward.D2'),
dendrogram = "column",
density.info = "none",
scale = "column",
trace = "none",
col = col)
Coefficient of variance (cv) is a measurement of overall stability of reference genes across all the samples. The smaller the CV is, the better.
cv <- df %>%
group_by(Ref_gene) %>%
summarize(Cq_max = max(Cq),
Cq_min = min(Cq),
Cq_range = max(Cq) - min(Cq),
Quantity_mean = mean(Quantity),
Quantity_SD = sd(Quantity),
Quantity_CV = 100 * Quantity_SD / Quantity_mean) %>%
mutate(CV_rank = dense_rank(Quantity_CV)) %>% # rank reference genes by CV
arrange(Ref_gene)
The F-statistic is a measurement of reference gene stability across experimental treatments. The smaller the F-statistic is, the better.
library(broom) # tidy statistical outputs
F_stat <- df %>%
group_by(Ref_gene) %>%
do(tidy(aov(Quantity ~ Diet, data = .))) %>% # run ANOVA and tidy statistical outputs
na.omit() %>%
select(Ref_gene, statistic) %>%
rename(Quantity_F = statistic) %>%
as.data.frame() %>%
mutate(F_rank = dense_rank(Quantity_F)) %>% # rank reference genes by F statistic
arrange(Ref_gene)
# Merge tables
smr <- full_join(cv, F_stat, by = "Ref_gene")
# Format digits
smr$Quantity_mean <- formatC(smr$Quantity_mean, format = "e", digits = 1)
smr$Quantity_SD <- formatC(smr$Quantity_SD, format = "e", digits = 2)
# Format table
kable(smr, format = "html", digits = 2, align = "l") %>%
kable_styling(bootstrap_options = c("striped", "hover"), font_size = 14, position = "left")
| Ref_gene | Cq_max | Cq_min | Cq_range | Quantity_mean | Quantity_SD | Quantity_CV | CV_rank | Quantity_F | F_rank |
|---|---|---|---|---|---|---|---|---|---|
| actb | 15.32 | 14.40 | 0.92 | 1.1e-04 | 1.42e-05 | 13.08 | 2 | 0.07 | 2 |
| gapdh | 18.73 | 17.79 | 0.94 | 9.0e-06 | 1.21e-06 | 13.33 | 3 | 0.58 | 4 |
| hprt1 | 23.11 | 21.59 | 1.52 | 2.1e-07 | 4.40e-08 | 21.00 | 4 | 0.37 | 3 |
| rnapo2 | 24.68 | 23.58 | 1.10 | 7.7e-07 | 9.37e-08 | 12.10 | 1 | 0.00 | 1 |
R version 3.5.2 (2018-12-20)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 17763)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] broom_0.5.2 d3heatmap_0.6.1.2
[3] gplots_3.0.1.1 corrplot_0.84
[5] PerformanceAnalytics_1.5.2 xts_0.11-2
[7] zoo_1.8-5 cowplot_0.9.99
[9] kableExtra_1.1.0 skimr_1.0.5
[11] data.table_1.12.2 forcats_0.4.0
[13] stringr_1.4.0 dplyr_0.8.1
[15] purrr_0.3.2 readr_1.3.1
[17] tidyr_0.8.3 tibble_2.1.1
[19] ggplot2_3.1.1 tidyverse_1.2.1
[21] summarytools_0.9.3 knitr_1.23
[23] here_0.1
loaded via a namespace (and not attached):
[1] httr_1.4.0 jsonlite_1.6 viridisLite_0.3.0
[4] modelr_0.1.4 gtools_3.8.1 assertthat_0.2.1
[7] highr_0.8 pander_0.6.3 cellranger_1.1.0
[10] yaml_2.2.0 ggrepel_0.8.1 pillar_1.4.0
[13] backports_1.1.4 lattice_0.20-38 glue_1.3.1
[16] quadprog_1.5-7 digest_0.6.19 pryr_0.1.4
[19] checkmate_1.9.3 rvest_0.3.4 colorspace_1.4-1
[22] htmltools_0.3.6 plyr_1.8.4 pkgconfig_2.0.2
[25] haven_2.1.0 magick_2.0 scales_1.0.0
[28] webshot_0.5.1 gdata_2.18.0 generics_0.0.2
[31] withr_2.1.2 lazyeval_0.2.2 cli_1.1.0
[34] magrittr_1.5 crayon_1.3.4 readxl_1.3.1
[37] evaluate_0.13 nlme_3.1-137 xml2_1.2.0
[40] rapportools_1.0 tools_3.5.2 hms_0.4.2
[43] matrixStats_0.54.0 munsell_0.5.0 compiler_3.5.2
[46] caTools_1.17.1.2 rlang_0.3.4 grid_3.5.2
[49] RCurl_1.95-4.12 rstudioapi_0.10 htmlwidgets_1.3
[52] base64enc_0.1-3 bitops_1.0-6 tcltk_3.5.2
[55] labeling_0.3 rmarkdown_1.13 gtable_0.3.0
[58] codetools_0.2-15 R6_2.4.0 lubridate_1.7.4
[61] rprojroot_1.3-2 KernSmooth_2.23-15 stringi_1.4.3
[64] Rcpp_1.0.1 png_0.1-7 tidyselect_0.2.5
[67] xfun_0.7